
********************************************************
******** CREATE OVERLAPPING DIRECTORS DATASET: YEAR-GVKEY1-GVKEY2  *************
********************************************************

/*

This do-file creates a dataset with each pair of firms that share a director 
in each year.  

1) It uses the file "boardex_empl.dta" created with the do-file "boardex_prep"
2) Evey year, for each GVKEY, it creates a list of all GVKEYs that have the same director. 
The firm of interest is called GVKEY1 and the others are called GVKEY2.
It repeats this for every director of GVKEY1.
3) It keep the ID of each OD. If there are more than one OD, I keep each ID. 
It also keeps the number of boards on which the OD seats.
4) It puts all years together.
5) It does the same for ISS dataset. 
6) It merges both datasets together and created the dasate called "overlapping_directors.dta". 
It also creates alternative versions of that dataset lagged in 1, 2, 3 and 4 years.

*/

cd "`c(pwd)'"   // Set working directory to wherever the user placed "replication_package"

* Define global paths relative to the main directory
global main_dir "`c(pwd)'"
global data_dir "$main_dir/data_analysis"
global rawdata_dir "$main_dir/data_raw"
global tables_dir "$main_dir/tables"

cd "$data_dir/"


** YEAR 1989
use boardex_empl, clear
keep if year==1989
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 4
* create a director-ID variable for each shared director
forvalues i=2/4 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/4 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1989
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1989, replace

** YEAR 1990
use boardex_empl, clear
keep if year==1990
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 4
* create a director-ID variable for each shared director
forvalues i=2/4 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/4 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1990
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1990, replace

** YEAR 1991
use boardex_empl, clear
keep if year==1991
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 6
* create a director-ID variable for each shared director
forvalues i=2/6 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/6 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1991
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1991, replace

** YEAR 1992
use boardex_empl, clear
keep if year==1992
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 4
* create a director-ID variable for each shared director
forvalues i=2/6 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/6 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1992
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1992, replace

** YEAR 1993
use boardex_empl, clear
keep if year==1993
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1993
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1993, replace

** YEAR 1994
use boardex_empl, clear
keep if year==1994
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1994
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1994, replace

** YEAR 1995
use boardex_empl, clear
keep if year==1995
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 11
* create a director-ID variable for each shared director
forvalues i=2/11 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/11 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1995
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1995, replace

** YEAR 1996
use boardex_empl, clear
keep if year==1996
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1996
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1996, replace


** YEAR 1997
use boardex_empl, clear
keep if year==1997
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1997
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1997, replace


** YEAR 1998
use boardex_empl, clear
keep if year==1998
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 10 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/10 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/10 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1998
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1998, replace


** YEAR 1999
use boardex_empl, clear
keep if year==1999
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 9
* create a director-ID variable for each shared director
forvalues i=2/9 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/9 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1999
order year gvkey1 gvkey2
save overlapping_directors_Boardex_1999, replace


** YEAR 2000
use boardex_empl, clear
keep if year==2000
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 9 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/9 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/9 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2000
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2000, replace


** YEAR 2001
use boardex_empl, clear
keep if year==2001
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 9
* create a director-ID variable for each shared director
forvalues i=2/9 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/9 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2001
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2001, replace


** YEAR 2002
use boardex_empl, clear
keep if year==2002
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 7 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/7 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/7 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2002
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2002, replace


** YEAR 2003
use boardex_empl, clear
keep if year==2003
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 9 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/9 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/9 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2003
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2003, replace


** YEAR 2004
use boardex_empl, clear
keep if year==2004
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 11
* create a director-ID variable for each shared director
forvalues i=2/11 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/11 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2004
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2004, replace


** YEAR 2005
use boardex_empl, clear
keep if year==2005
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 38 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/38 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/38 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2005
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2005, replace


** YEAR 2006
use boardex_empl, clear
keep if year==2006
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 36 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/36 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/36 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2006
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2006, replace


** YEAR 2007
use boardex_empl, clear
keep if year==2007
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 45 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/45 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/45 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 11
* create a director-ID variable for each shared director
forvalues i=2/11 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/11 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2007
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2007, replace


** YEAR 2008
use boardex_empl, clear
keep if year==2008
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 45 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/45 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/45 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2008
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2008, replace



** YEAR 2009
use boardex_empl, clear
keep if year==2009
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 21 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/21 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/21 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2009
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2009, replace



** YEAR 2010
use boardex_empl, clear
keep if year==2010
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 45 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/45 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/45 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2010
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2010, replace




** YEAR 2011
use boardex_empl, clear
keep if year==2011
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 44 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/44 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/44 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 11
* create a director-ID variable for each shared director
forvalues i=2/11 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/11 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2011
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2011, replace



** YEAR 2012
use boardex_empl, clear
keep if year==2012
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 14 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/14 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/14 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2012
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2012, replace




** YEAR 2013
use boardex_empl, clear
keep if year==2013
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 35 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/35 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/35 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2013
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2013, replace



** YEAR 2014
use boardex_empl, clear
keep if year==2014
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 31 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/31 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/31 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2014
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2014, replace



** YEAR 2015
use boardex_empl, clear
keep if year==2015
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 30 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/30 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/30 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2015
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2015, replace



** YEAR 2016
use boardex_empl, clear
keep if year==2016
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 23 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/23 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/23 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2016
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2016, replace



** YEAR 2017
use boardex_empl, clear
keep if year==2017
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 22 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/22 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/22 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 13
* create a director-ID variable for each shared director
forvalues i=2/13 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/13 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2017
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2017, replace


** YEAR 2018
use boardex_empl, clear
keep if year==2018
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 20 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/20 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/20 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 12
* create a director-ID variable for each shared director
forvalues i=2/12 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/12 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2018
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2018, replace

** YEAR 2019
use boardex_empl, clear
keep if year==2019
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 9
* create a director-ID variable for each shared director
forvalues i=2/9 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/9 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2019
order year gvkey1 gvkey2
save overlapping_directors_Boardex_2019, replace


** ALL YEARS TOGETHER:

use overlapping_directors_Boardex_1989, clear
append using overlapping_directors_Boardex_1990
append using overlapping_directors_Boardex_1991
append using overlapping_directors_Boardex_1992
append using overlapping_directors_Boardex_1993
append using overlapping_directors_Boardex_1994
append using overlapping_directors_Boardex_1995
append using overlapping_directors_Boardex_1996
append using overlapping_directors_Boardex_1997
append using overlapping_directors_Boardex_1998
append using overlapping_directors_Boardex_1999
append using overlapping_directors_Boardex_2000
append using overlapping_directors_Boardex_2001
append using overlapping_directors_Boardex_2002
append using overlapping_directors_Boardex_2003
append using overlapping_directors_Boardex_2004
append using overlapping_directors_Boardex_2005
append using overlapping_directors_Boardex_2006
append using overlapping_directors_Boardex_2007
append using overlapping_directors_Boardex_2008
append using overlapping_directors_Boardex_2009
append using overlapping_directors_Boardex_2010
append using overlapping_directors_Boardex_2011
append using overlapping_directors_Boardex_2012
append using overlapping_directors_Boardex_2013
append using overlapping_directors_Boardex_2014
append using overlapping_directors_Boardex_2015
append using overlapping_directors_Boardex_2016
append using overlapping_directors_Boardex_2017
append using overlapping_directors_Boardex_2018
append using overlapping_directors_Boardex_2019

save overlapping_directors_Boardex, replace



********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

*** ISS DIRECTORS

********************************************************************************
********************************************************************************


** YEAR 1996
use iss_empl, clear
keep if year==1996
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1996
order year gvkey1 gvkey2
save overlapping_directors_iss_1996, replace


** YEAR 1997
use iss_empl, clear
keep if year==1997
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1997
order year gvkey1 gvkey2
save overlapping_directors_iss_1997, replace


** YEAR 1998
use iss_empl, clear
keep if year==1998
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 9 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/9 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/9 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1998
order year gvkey1 gvkey2
save overlapping_directors_iss_1998, replace


** YEAR 1999
use iss_empl, clear
keep if year==1999
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 10 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/10 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/10 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=1999
order year gvkey1 gvkey2
save overlapping_directors_iss_1999, replace


** YEAR 2000
use iss_empl, clear
keep if year==2000
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 10 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/10 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/10 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 10
* create a director-ID variable for each shared director
forvalues i=2/10 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/10 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2000
order year gvkey1 gvkey2
save overlapping_directors_iss_2000, replace


** YEAR 2001
use iss_empl, clear
keep if year==2001
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 10
* create a director-ID variable for each shared director
forvalues i=2/10 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/10 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2001
order year gvkey1 gvkey2
save overlapping_directors_iss_2001, replace


** YEAR 2002
use iss_empl, clear
keep if year==2002
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2002
order year gvkey1 gvkey2
save overlapping_directors_iss_2002, replace


** YEAR 2003
use iss_empl, clear
keep if year==2003
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 8 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/8 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/8 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2003
order year gvkey1 gvkey2
save overlapping_directors_iss_2003, replace


** YEAR 2004
use iss_empl, clear
keep if year==2004
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 7 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/7 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/7 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 9
* create a director-ID variable for each shared director
forvalues i=2/9 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/9 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2004
order year gvkey1 gvkey2
save overlapping_directors_iss_2004, replace


** YEAR 2005
use iss_empl, clear
keep if year==2005
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 7 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/7 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/7 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 10
* create a director-ID variable for each shared director
forvalues i=2/10 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/10 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2005
order year gvkey1 gvkey2
save overlapping_directors_iss_2005, replace


** YEAR 2006
use iss_empl, clear
keep if year==2006
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 7 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/7 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/7 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2006
order year gvkey1 gvkey2
save overlapping_directors_iss_2006, replace


** YEAR 2007
use iss_empl, clear
keep if year==2007
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2007
order year gvkey1 gvkey2
save overlapping_directors_iss_2007, replace


** YEAR 2008
use iss_empl, clear
keep if year==2008
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2008
order year gvkey1 gvkey2
save overlapping_directors_iss_2008, replace


** YEAR 2009
use iss_empl, clear
keep if year==2009
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 6 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/6 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/6 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2009
order year gvkey1 gvkey2
save overlapping_directors_iss_2009, replace


** YEAR 2010
use iss_empl, clear
keep if year==2010
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2010
order year gvkey1 gvkey2
save overlapping_directors_iss_2010, replace


** YEAR 2011
use iss_empl, clear
keep if year==2011
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 5
* create a director-ID variable for each shared director
forvalues i=2/5 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/5 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2011
order year gvkey1 gvkey2
save overlapping_directors_iss_2011, replace


** YEAR 2012
use iss_empl, clear
keep if year==2012
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 7
* create a director-ID variable for each shared director
forvalues i=2/7 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/7 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2012
order year gvkey1 gvkey2
save overlapping_directors_iss_2012, replace


** YEAR 2013
use iss_empl, clear
keep if year==2013
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2013
order year gvkey1 gvkey2
save overlapping_directors_iss_2013, replace


** YEAR 2014
use iss_empl, clear
keep if year==2014
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2014
order year gvkey1 gvkey2
save overlapping_directors_iss_2014, replace


** YEAR 2015
use iss_empl, clear
keep if year==2015
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 8
* create a director-ID variable for each shared director
forvalues i=2/8 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/8 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2015
order year gvkey1 gvkey2
save overlapping_directors_iss_2015, replace


** YEAR 2016
use iss_empl, clear
keep if year==2016
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 4
* create a director-ID variable for each shared director
forvalues i=2/4 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/4 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2016
order year gvkey1 gvkey2
save overlapping_directors_iss_2016, replace


** YEAR 2017
use iss_empl, clear
keep if year==2017
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 4
* create a director-ID variable for each shared director
forvalues i=2/4 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/4 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2017
order year gvkey1 gvkey2
save overlapping_directors_iss_2017, replace


** YEAR 2018
use iss_empl, clear
keep if year==2018
keep  directorid gvkey 
order  directorid gvkey 
sort  directorid gvkey 
bys directorid: gen num=_n
sum num
* max of 5 linked firm by one individual director
by directorid: gen last = _n == _N
by directorid: egen size = max(num) 
gen b=(size*size)-size+1
expand b if last, generate(added)
sort  directorid gvkey added
keep directorid gvkey added size
* I am keeping "size" because the director's network may be usefull for future analysis
replace gvkey=. if added==1

gsort directorid -added gvkey
forvalues i=1/5 {
    replace gvkey=gvkey[_n+size] if gvkey==.
  }
rename gvkey gvkey1
gen gvkey2=.

by directorid: gen num = _n
forvalues i=1/5 {
    by directorid: replace gvkey2=gvkey1[_n+`i'] if num>size*(`i'-1) & num<=size*(`i')
  }

gen comb=(exp(lnfactorial(size)))/(2*(exp(lnfactorial(size-2))))
drop if num>comb
drop if gvkey2==.

keep directorid gvkey1 gvkey2 size
order gvkey1 gvkey2 directorid size
sort gvkey1 gvkey2 
egen pair=group(gvkey1 gvkey2)
*number of directors that companies share: 
sort pair
bys pair: gen num=_N
sum num
* max: 3
* create a director-ID variable for each shared director
forvalues i=2/3 {
    gen directorid`i'=.
    replace directorid`i'= directorid[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename directorid directorid1

rename size size_odlink
forvalues i=2/3 {
    gen size_odlink`i'=.
    replace size_odlink`i'= size_odlink[_n+(`i'-1)] if pair==pair[_n+(`i'-1)]
  }
rename size_odlink size_odlink1
label var size_odlink1 "number of boards of directorid1"

sort gvkey1 gvkey2 pair
drop if pair==pair[_n-1]
drop pair num 
gen year=2018
order year gvkey1 gvkey2
save overlapping_directors_iss_2018, replace


** ALL YEARS TOGETHER

use overlapping_directors_iss_1996, clear
append using overlapping_directors_iss_1997
append using overlapping_directors_iss_1998
append using overlapping_directors_iss_1999
append using overlapping_directors_iss_2000
append using overlapping_directors_iss_2001
append using overlapping_directors_iss_2002
append using overlapping_directors_iss_2003
append using overlapping_directors_iss_2004
append using overlapping_directors_iss_2005
append using overlapping_directors_iss_2006
append using overlapping_directors_iss_2007
append using overlapping_directors_iss_2008
append using overlapping_directors_iss_2009
append using overlapping_directors_iss_2010
append using overlapping_directors_iss_2011
append using overlapping_directors_iss_2012
append using overlapping_directors_iss_2013
append using overlapping_directors_iss_2014
append using overlapping_directors_iss_2015
append using overlapping_directors_iss_2016
append using overlapping_directors_iss_2017
append using overlapping_directors_iss_2018

save overlapping_directors_iss, replace





***************************************************
***************************************************
***************************************************
***************************************************

** BOARDEX AND ISS

***************************************************
***************************************************

use overlapping_directors_boardex, clear
append using overlapping_directors_iss
sort year gvkey1 gvkey2
save overlapping_directors, replace 

use overlapping_directors, clear
replace year=year+1
forvalues i=1/13 {
    rename directorid`i' directorid`i'l1
    rename size_odlink`i' size_odlink`i'l1
  }
save overlapping_directorsl1, replace 

use overlapping_directors, clear
replace year=year+2
forvalues i=1/13 {
    rename directorid`i' directorid`i'l2
    rename size_odlink`i' size_odlink`i'l2
  }
save overlapping_directorsl2, replace 

use overlapping_directors, clear
replace year=year+3
forvalues i=1/13 {
    rename directorid`i' directorid`i'l3
    rename size_odlink`i' size_odlink`i'l3
  }
save overlapping_directorsl3, replace 

use overlapping_directors, clear
replace year=year+4
forvalues i=1/13 {
    rename directorid`i' directorid`i'l4
    rename size_odlink`i' size_odlink`i'l4
  }
save overlapping_directorsl4, replace 

